Week 9 Lecture Commands

Author

Lianne Sheppard for ENVH 556 Winter 2024

This document was rendered on September 25, 2024.

Slides 8 & 9: AR(1) process example, \(\alpha\)=0.5

#---- AR(1) example----

# function to generate an AR(1) series.  Includes units in function
ar_data <- function(n = 500, interval = 1, units = "seconds", alpha=0.5){
  
  # function arguments
    # n: integer, desired output dataframe length
    # interval: integer specifying the number between timestamps
    # units: character specifying the lubridate period function (e.g. "minutes")
    # alpha: numeric value less than 1 specifying the AR alpha term.
  
  # function requires lubridate and tsibble
  pacman::p_load(lubridate, tsibble)
  
  # use match.fun() to find the lubridate function specifying time units
  input_units <- match.fun(units)
  
  # generate time sequence
  x <- seq(1:n)*interval %>% input_units()
  
  # generate e and y
  e <- rnorm(n)
  y <- double(n)
  y[1] <- 0
  for (i in 2:n) {y[i] <- alpha*y[i-1] + e[i]}

  # make a `tsibble`
  ts_ex_data <- data.frame(time = x, y) %>% as_tsibble(index = time) 
  
  # explicit return
  return(ts_ex_data)
  
}


# Alternative to function:  Manual creation of an AR(1) time series
# # n is length of time series
# n <- 500
# # generate data with alpha = .5
# e <- rnorm(n)
# y <- double(n)
# y[1] <- 0
# for (i in 2:n) {y[i] <- 0.5*y[i-1] + e[i]}

set.seed(20)

# create data 
ts_ex_data <- ar_data() 

# plot time series
ggplot() + 
  geom_line(data = ts_ex_data, aes(x = time, y = y)) +
  labs(title = "AR(1) time series",
       x = "Time axis",
       y = "Outcome")

# plot autocorrelation
ts_ex_data %>% ACF(y) %>% autoplot()

# plot partial autocorrelation
ts_ex_data %>% PACF(y) %>% autoplot()

Slide10: AR(1) process example, \(\alpha\)=0.9

#---- AR(1) example2----

# create data 
ts_ex_data <- ar_data(alpha = 0.9) 

# plot time series
ggplot() + 
  geom_line(data = ts_ex_data, aes(x = time, y = y)) +
  labs(title = "AR(1) time series, alpha=0.9",
       x = "Time axis",
       y = "Outcome")

# plot autocorrelation
ts_ex_data %>% ACF(y) %>% autoplot()

# plot partial autocorrelation
ts_ex_data %>% PACF(y) %>% autoplot()

Slide 13: Moving average example

#---- MA(2) process example----

# n is length of time series
n <- 500

# generate data with rho1 = .0, rho2 = .8
e <- rnorm(n)
y <- double(n)
y[1] <- e[1]
y[2] <- e[2] + 0.8*e[1]
for (i in 3:n) {y[i] <- e[i] + 0.8*e[i-1] + 0.8*e[i-2]}
 
# make a tsibble 
ts_ex_data <- tibble(y, time = seq(1,n)) %>%
  as_tsibble(index = time)

# plot time series
ggplot() + 
  geom_line(data = ts_ex_data, aes(x = time, y = y)) +
  labs(title = "MA(2) time series",
       x = "Time axis",
       y = "Outcome")

# plot autocorrelation
ts_ex_data %>% ACF(y) %>% autoplot()

# plot partial autocorrelation
ts_ex_data %>% PACF(y) %>% autoplot()

Slide 14: ARMA(1,1)

#---- ARMA(1,1) ----

set.seed(200)

# n is length of time series
n <- 500

# generate data with alpha=0.9, rho = .8
e <- rnorm(n)
y <- double(n)
y[1] <- e[1]
for (i in 2:n) {y[i] <- 0.9*y[i-1] + e[i] + 0.8*e[i-1] }
 
# make a tsibble 
ts_ex_data <- tibble(y, time = seq(1,n)) %>%
  as_tsibble(index = time)

# plot time series
ggplot() + 
  geom_line(data = ts_ex_data, aes(x = time, y = y)) +
  labs(title = "ARMA(1,1) time series",
       x = "Time axis",
       y = "Outcome")

# plot autocorrelation
ts_ex_data %>% ACF(y) %>% autoplot()

# plot partial autocorrelation
ts_ex_data %>% PACF(y) %>% autoplot()

Not a slide: Time plots with smoother overlaid

The next chunk shows the slider function and a user-written moving function that can present a running 10th percentile.

#---- Time plot with smoother ----

# Using slider function with mean; the lab shows it with the quantile
ts_ex_data <- ts_ex_data %>%
  mutate(slide_20 = slide_dbl(ts_ex_data$y, ~mean(.x, na.rm = TRUE), .before = 10, .after = 10) )


# User-written moving window function:  Functionally this is the same as the
# slide function; seeing the code allows you to see how it can be constructed.
# Code found here:
# https://stackoverflow.com/questions/743812/calculating-moving-average
moving_fn <- function(x, w, fun, side, ...) {
  # x = vector with numeric data
  # w = window length
  # fun = function to apply
  # side = side to take, (c)entre, (l)eft or (r)ight
  # ... = parameters passed on to 'fun'
  y <- numeric(length(x))
  for (i in seq_len(length(x))) {
    if (side %in% c("c", "centre", "center")) {
      ind <- c((i - floor(w / 2)):(i + floor(w / 2)))
    } else if (side %in% c("l", "left")) {
      ind <- c((i - floor(w) + 1):i)
    } else if (side %in% c("r", "right")) {
      ind <- c(i:(i + floor(w) - 1))
    } else {
      stop("'side' must be one of 'centre', 'left', 'right'", call. = FALSE)
    }
    ind <- ind[ind %in% seq_len(length(x))]
    y[i] <- fun(x[ind], ...)
  }
  y
}

# and now create any variation you can think of!
moving_average <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = mean, side = side, na.rm = na.rm)
}

moving_sum <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = sum, side = side, na.rm = na.rm)
}

moving_maximum <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = max, side = side, na.rm = na.rm)
}

moving_median <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = median, side = side, na.rm = na.rm)
}

moving_Q1 <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.25)
}

moving_Q3 <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.75)
}

moving_10th <- function(x, w = 5, side = "centre", na.rm = FALSE) {
  moving_fn(x = x, w = w, fun = quantile, side = side, na.rm = na.rm, 0.10)
}

# Test rolling background:
roll10_y <- moving_10th(ts_ex_data$y, w = 10, side = "c")

# Show the results on a plot
ts_ex_data %>%
  bind_cols(roll10_y) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, slide_20), color="red", lty=1) +
  geom_line(aes(time, roll10_y), color="blue", lty=2) +
  labs(title = "ARMA(1,1) time series\n slider & rolling 10th %tile",
       x = "Time axis",
       y = "Outcome")
New names:
• `` -> `...4`

Loess and kernel smoother examples

Examples in this section follow the nice exposition by Rafael A. Irizarry in his Introduction to Data Science on bin smoothing and loess for how kernel smoothers and loess work. It is worth taking a look at his animated graphics that show the repeated estimation across the length of the data.

Slide 18: 4 example plots with different kernel smoothers

#---- bin and kernel smoothing ----

# kernel smoothing, box and normal kernel
# note the Gaussian is a smoother function, even for the same span
span <- 10 
fit_ru <- with(ts_ex_data, 
            ksmooth(time, y, kernel = "box", bandwidth = span))
fit_rw <- with(ts_ex_data, 
            ksmooth(time, y, kernel = "normal", bandwidth = span))
span <- 50
fit_su <- with(ts_ex_data, 
            ksmooth(time, y, kernel = "box", bandwidth = span))
fit_sw <- with(ts_ex_data, 
            ksmooth(time, y, kernel = "normal", bandwidth = span))

# 4 separate plots for now:
# Rough, unweighted
ts_ex_data %>% mutate(smooth = fit_ru$y) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth), color="red") +
  labs(title = "ARMA(1,1) time series\n box smooth (span 10)",
       x = "Time axis",
       y = "Outcome")

# Rough, weighted
ts_ex_data %>% mutate(smooth = fit_rw$y) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth), color="red")+
  labs(title = "ARMA(1,1) time series\n Gaussian smooth (span 10)",
       x = "Time axis",
       y = "Outcome")

# Smooth, unweighted
ts_ex_data %>% mutate(smooth = fit_su$y) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth), color="red") +
  labs(title = "ARMA(1,1) time series\n box smooth (span 50)",
       x = "Time axis",
       y = "Outcome")

# Smooth, weighted
ts_ex_data %>% mutate(smooth = fit_sw$y) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth), color="red") +
  labs(title = "ARMA(1,1) time series\n Gaussian smooth (span 50)",
       x = "Time axis",
       y = "Outcome")

Slide 20: loess

Loess is a locally weighted regression model fit over a small span. The default in loewss is a 2nd degree polynomial, rather than a linear regression line.

#---- loess example ----

# first convert to a tibble
ts_ex_data <- ts_ex_data %>% tibble()

# choose a span based on the length of the data
# default span is .75; we are using .1 (50/500)
span <- .25

# consider two different loess smoothers, linear and polynomial
fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)
# the default is degree = 2
fit2 <- loess(y ~ time, span = span, data=ts_ex_data)

ts_ex_data %>% mutate(smooth1 = fit1$fitted,
                      smooth2 = fit2$fitted) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth1), color="red", lty = 1) +
  geom_line(aes(time, smooth2), color="blue", lty = 2) +
  labs(title = "ARMA(1,1) time series\n with loess smooth (span .25)",
       x = "Time axis",
       y = "Outcome")

# short span
span <- .1

# consider two different loess smoothers, linear and polynomial
fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)
# the default is degree = 2
fit2 <- loess(y ~ time, span = span, data=ts_ex_data)

ts_ex_data %>% mutate(smooth1 = fit1$fitted,
                      smooth2 = fit2$fitted) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth1), color="red", lty = 1) +
  geom_line(aes(time, smooth2), color="blue", lty = 2) +
  labs(title = "ARMA(1,1) time series\n with loess smooth (span .1)",
       x = "Time axis",
       y = "Outcome")

# long span
span <- .75

# consider two different loess smoothers, linear and polynomial
fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)
# the default is degree = 2
fit2 <- loess(y ~ time, span = span, data=ts_ex_data)

ts_ex_data %>% mutate(smooth1 = fit1$fitted,
                      smooth2 = fit2$fitted) %>%
  ggplot(aes(time, y)) +
  geom_point(size = 1, alpha = .5, color = "grey") +
  geom_line(aes(time, smooth1), color="red", lty = 1) +
  geom_line(aes(time, smooth2), color="blue", lty = 2) +
  labs(title = "ARMA(1,1) time series\n with loess smooth (span .75)",
       x = "Time axis",
       y = "Outcome")

Slide 21: Smooth vs. rough decomposition

#---- smooth vs. rough ----

# after minimal smoothing -- long span
span <- .75

fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)
ts_ex_data %>% mutate(smooth1 = fit1$fitted,
                      resid1  = y - fit1$fitted) %>%
  ggplot(aes(time, resid1)) +
  geom_point(size = .75, alpha = .5, color = "black") +
  geom_abline(intercept=0, slope = 0, color="red", lty = 1) +
  labs(title = "ARMA(1,1) residual time series\n light smoothing removed",
       x = "Time axis",
       y = "Residual Outcome")

# after more aggressive smoothing -- wigglier curve
span <- .1

fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)

ts_ex_data %>% mutate(smooth1 = fit1$fitted,
                      resid1  = y - fit1$fitted) %>%
  ggplot(aes(time, resid1)) +
  geom_point(size = .75, alpha = .5, color = "black") +
  geom_abline(intercept=0, slope = 0, color="red", lty = 1) +
  labs(title = "ARMA(1,1) residual time series\n wiggly smoother removed",
       x = "Time axis",
       y = "Residual Outcome")

Slide 22: Differencing

#---- Differencing ----

# first difference:
ts_ex_diff <- ts_ex_data %>% 
    mutate(diff = y - lag(y, default = first(y), order_by = time))

#ts_ex_diff

# plot the first differences and check how smooth they are:
span = 0.75
fit1 <- loess(y ~ time, degree=1, span = span, data=ts_ex_data)
ts_ex_diff %>% mutate(smooth1 = fit1$fitted,
                      resid1  = y - fit1$fitted) %>%
  ggplot(aes(time, diff)) +
  geom_point(size = .75, alpha = .5, color = "black") +
  geom_line(aes(time, smooth1), color="red", lty = 1) +
  labs(title = "ARMA(1,1) first differences\n w/ loess smooth (span .75)",
       x = "Time axis",
       y = "Residual Outcome")

Spline smoothing

TODO: Use this section to discus the difference between smoothing and regression splines and show some example smooths.